import json
import os
from config import PROMPT_TEMPLATE


def create_batch_file(text_df, output_dir="batch_files"):
    """创建批量任务JSONL文件"""
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, "text_content_batch.jsonl")

    batch_data_list = []

    for index, row in text_df.iterrows():
        prompt = PROMPT_TEMPLATE.format(
            source_lang=row['英文内容'],
            translation=row['中文内容']
        )

        batch_data = {
            "custom_id": f"request-{row['全局行号']}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": "deepseek-ai/DeepSeek-R1",
                "messages": [
                    {
                        "role": "system",
                        "content": "您是一位精通从英文到中文翻译的专业语言学家。您将获得一段港股招股书源文本及其译文，您的任务是校对译文。"
                    },
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 1514,
            }
        }
        batch_data_list.append(batch_data)

    with open(output_path, 'w', encoding='utf-8') as f:
        for batch_data in batch_data_list:
            f.write(json.dumps(batch_data, ensure_ascii=False) + '\n')

    return output_path